
#第一家店铺原始数据读取
product_1_pr <- read.csv("DATA/product_1_provided_return.csv", header=FALSE, stringsAsFactors=FALSE)
product_1_as <- read.delim("DATA/product_1_actual_sales.csv", header=FALSE, stringsAsFactors=FALSE)

#product_1_pr[1:10,]
#product_1_as[1:10,]
#发出回收数
colnames(product_1_pr)<-c("产品号","销售点号","年","月","日","发出数量","回收数量")
#销售数
colnames(product_1_as)<-c("产品号","销售日期","回收日期","销售点号","销售数量")
table(product_1_pr$销售点号)
#销售点号异常值检验
length(table(product_1_as$销售点号))
length(table(product_1_pr$销售点号))
#发出数量回收数量异常值检验
table(is.na(product_1_pr$发出数量))
table(is.na(product_1_pr$回收数量))
product_1_pr[-grep("\\d{7}",product_1_pr$发出数量),]
product_1_pr$发出数量<-gsub("-","0",product_1_pr$发出数量)
product_1_pr$发出数量 <- as.numeric(product_1_pr$发出数量)
product_1_pr[-grep("\\d{7}",product_1_pr$回收数量),]
product_1_pr$回收数量<-gsub("-","0",product_1_pr$回收数量)
product_1_pr$回收数量 <- as.numeric(product_1_pr$回收数量)
product_1_pr<-cbind(product_1_pr[,!colnames(product_1_pr)%in%c("年","月","日")],strptime(x = paste(product_1_pr$年,product_1_pr$月,product_1_pr$日,sep = "/"),format="%Y/%m/%d"))
colnames(product_1_pr)<-c("产品号","销售点号","发出数量","回收数量","日期")
#将发放数据和回收数据分开
product_1_p<-product_1_pr[,c("销售点号","发出数量","日期")]
product_1_r<-product_1_pr[,c("销售点号","回收数量","日期")]


#第二家店铺原始数据读取
product_2_p <- read.csv("DATA/product_2_provided.csv", header=FALSE, stringsAsFactors=FALSE)
product_2_r <- read.csv("DATA/product_2_return.csv", header=FALSE, stringsAsFactors=FALSE)
product_2_p <- product_2_p[,1:6]
product_2_r <- product_2_r[,1:6]
colnames(product_2_p)<-c("产品号","销售点号","年","月","日","发出数量")
#销售数
colnames(product_2_r)<-c("产品号","销售点号","年","月","日","回收数量")
product_2_p[-grep("\\d{7}",product_2_p$发出数量),]
product_2_p$发出数量<-gsub("-","0",product_2_p$发出数量)
product_2_p$发出数量 <- as.numeric(product_2_p$发出数量)
product_2_r[-grep("\\d{7}",product_2_r$回收数量),]
product_2_r$回收数量<-gsub("-","0",product_2_r$回收数量)
product_2_r$回收数量 <- as.numeric(product_2_r$回收数量)
product_2_p<-cbind(product_2_p[,!colnames(product_2_p)%in%c("年","月","日")],strptime(x = paste(product_2_p$年,product_2_p$月,product_2_p$日,sep = "/"),format="%Y/%m/%d"))
product_2_r<-cbind(product_2_r[,!colnames(product_2_r)%in%c("年","月","日")],strptime(x = paste(product_2_r$年,product_2_r$月,product_2_r$日,sep = "/"),format="%Y/%m/%d"))
product_2_p<-product_2_p[,-1]
product_2_r<-product_2_r[,-1]
colnames(product_2_p)<-c("销售点号","发出数量","日期")
colnames(product_2_r)<-c("销售点号","回收数量","日期")
#第三家店铺原始数据读取
product_3_pr <- read.csv("DATA/product_3_provided_return.csv", header=FALSE, stringsAsFactors=FALSE)
colnames(product_3_pr)<-c("产品号","销售点号","年","月","日","发出数量","回收数量")
product_3_pr[-grep("\\d{7}",product_3_pr$发出数量),]
product_3_pr$发出数量<-gsub("-","0",product_3_pr$发出数量)
product_3_pr$发出数量 <- as.numeric(product_3_pr$发出数量)
product_3_pr[-grep("\\d{7}",product_3_pr$回收数量),]
product_3_pr$回收数量<-gsub("-","0",product_3_pr$回收数量)
product_3_pr$回收数量 <- as.numeric(product_3_pr$回收数量)
product_3_pr<-cbind(product_3_pr[,!colnames(product_3_pr)%in%c("年","月","日")],strptime(x = paste(product_3_pr$年,product_3_pr$月,product_3_pr$日,sep = "/"),format="%Y/%m/%d"))
colnames(product_3_pr)<-c("产品号","销售点号","发出数量","回收数量","日期")
#将发放数据和回收数据分开
product_3_p<-product_3_pr[,c("销售点号","发出数量","日期")]
product_3_r<-product_3_pr[,c("销售点号","回收数量","日期")]



#第四家店铺原始数据读取
product_4_pr <- read.csv("DATA/product_4_provided_return.csv", header=FALSE, stringsAsFactors=FALSE)
colnames(product_4_pr)<-c("产品号","销售点号","年","月","日","发出数量","回收数量")
product_4_pr[-grep("\\d{7}",product_4_pr$发出数量),]
product_4_pr$发出数量<-gsub("-","0",product_4_pr$发出数量)
product_4_pr$发出数量 <- as.numeric(product_4_pr$发出数量)
product_4_pr[-grep("\\d{7}",product_4_pr$回收数量),]
product_4_pr$回收数量<-gsub("-","0",product_4_pr$回收数量)
product_4_pr$回收数量 <- as.numeric(product_4_pr$回收数量)
product_4_pr<-cbind(product_4_pr[,!colnames(product_4_pr)%in%c("年","月","日")],strptime(x = paste(product_4_pr$年,product_4_pr$月,product_4_pr$日,sep = "/"),format="%Y/%m/%d"))
colnames(product_4_pr)<-c("产品号","销售点号","发出数量","回收数量","日期")
#将发放数据和回收数据分开
product_4_p<-product_4_pr[,c("销售点号","发出数量","日期")]
product_4_r<-product_4_pr[,c("销售点号","回收数量","日期")]


clg<-function(x){
  x[,3]<-strftime(x = x[,3],format="%Y-%m-%d")
  x[,1]<-as.character(x[,1])
  x_row<-unique(x$销售点号)
  x_col<-unique(x$日期)
  res<-matrix(0,nrow = length(x_row),ncol = length(x_col))
  rownames(res)<-x_row
  colnames(res)<-x_col
  for(i in 1:nrow(x)){
    res[x[i,1],x[i,3]] <- x[i,2]
    if(i%%1000==0) print(floor(i/1000))
  }
  res
}

product_1_p_1<-clg(x=product_1_p)
write.csv(product_1_p_1,"DATA/product_1_p_1")
product_1_r_1<-clg(x=product_1_r)
write.csv(product_1_r_1,"DATA/product_1_r_1")

product_2_p_1<-clg(x=product_2_p)
write.csv(product_2_p_1,"DATA/product_2_p_1")
product_2_r_1<-clg(x=product_2_r)
write.csv(product_2_r_1,"DATA/product_2_r_1")

product_3_p_1<-clg(x=product_3_p)
write.csv(product_3_p_1,"DATA/product_3_p_1")
product_3_r_1<-clg(x=product_3_r)
write.csv(product_3_r_1,"DATA/product_3_r_1")

product_4_p_1<-clg(x=product_4_p)
write.csv(product_4_p_1,"DATA/product_4_p_1")
product_4_r_1<-clg(x=product_4_r)
write.csv(product_4_r_1,"DATA/product_4_r_1")










